********************************************************************************

* Filename: Table_4_TeacherKnowledgeImpacts.do

********************************************************************************
*  This do-file is part of the collection of replication files for Schaffner, Glewwe, and Sharma, 
*  "Why Programs Fail:  Lessons for Improving Public Service Quality from a Mixed Methods Evaluation
*   of an Unsuccessful Teacher Training Program in Nepal"
*
*This do-file calculates:
*      Regression results for Table 4.
*
* Software version used: STATA/SE 18.0
********************************************************************************
*GLOBAL FILE PATH DEFINITIONS
	global PBRfolder "ADD FILE REFERENCE TO MAIN FOLDER HERE" 
	global datasets = "$PBRfolder\Datasets"
	global logs = "$PBRfolder\Logs"

********************************************************************************
* SET-UP
	set more off
	clear all
	set varabbrev on
	capture log close
	log using "$logs\Table_4_calcs", replace
	cd "$datasets"

********************************************************************************

*MATH TEACHER SUBJECT KNOWLEDGE REGRESSION

********************************************************************************

	* READ IN MATH TEACHER REPSONSES TO "TEACHER EVALUATION OF MATH ASSESSMENT ITEMS"

		use Teachereval_math_c, clear

		*Rename the variables in a way that will be good for looping and re-shaping, and look at descriptive statistics 
		local i=1    // indexes assessment item
		while `i'<=12 {
			gen m_clear_`i'=t_math_`i'_a==1 if t_math_`i'_a~=. //question was clear to the teacher
			replace t_math_`i'_correct=. if t_math_`i'_b==.   // which answer do you think is right answer?
			gen m_correct_`i'=t_math_`i'_correct  // picked right answer
			gen m_approp_`i'=t_math_`i'_d==1 //appropriate for testing curriculum
			gen m_stupct_`i'=t_math_`i'_c // guess about percent of students who will get correct
			local i=`i' +1
			}
			
		unique schoolid // 189 schools, 246 records/teachers who filled out evaluations
		sort schoolid
		by schoolid: gen num=_N
			tab num  // 59 percent of schools have only one teacher math evaluation, but there are up to 4 in a school
			
		summ m_clear* m_correct* m_approp* m_stupct*
		
		* Calculate total score. Given low correct response rate and our assessment of low item quality, we chose not to use Item 10. 
		drop m_correct_10
		order m_correct_1 m_correct_2 m_correct_3 m_correct_4 m_correct_5 m_correct_6 m_correct_7 m_correct_8 m_correct_9 m_correct_11 m_correct_12
		egen tot_score = rowtotal(m_correct_1 - m_correct_12)
		  
		tab tot_score  // raw scores
		
		* Generate latent value using IRT.
		irt 2pl m_correct_1 -m_correct_12
		predict TeacherIRTmath, latent
		su TeacherIRTmath, d  /* 246 observations */
					
		* merge in school data 
		
		sort schoolid
		merge m:1 schoolid using basicdata
		keep if _merge==3
		tab studyarm
		gen treat=studyarm==1 | studyarm==2
		tab treat
			
		* set up survey design
		gen dist_stratum = (district*10)+stratum
		svyset schoolid [pweight=sch_wght], strata(dist_stratum)

		* standardize the test score (relative to control sample mean and standard deviation)
		svy, over(treat): mean TeacherIRTmath
				mat b = e(b)
				gen mean = b[1,1]
				estat sd
				mat sd = r(sd)
				gen sd = sd[1,1]
				gen TeacherIRTmath_std=(TeacherIRTmath-mean)/sd
				drop mean sd
		
		summ TeacherIRTmath TeacherIRTmath_std
		svy, over(treat): mean TeacherIRTmath_std
		
		* ITT IMPACT REGRESSION (MATH SUBJECT KNOWLEDGE)
		
		svy: reg TeacherIRTmath_std treat district#stratum 
		estimates store math
		scalar rsquared=e(r2)
		
		etable, column(index) estimates(math) showstars showstarsnote keep(treat) cstat(_r_b) /// 
				cstat(_r_se) cstat(_r_ci) mstat(N) mstat(r2) ///
				stars(.10 "*" .05 "**" .01 "***", attach(_r_b)) 
		di rsquared


********************************************************************************

*SCIENCE TEACHER SUBJECT KNOWLEDGE REGRESSION

********************************************************************************

	* READ IN SCIENCE TEACHER REPSONSES TO "TEACHER EVALUATION OF SCIENCE ASSESSMENT ITEMS"

		use Teachereval_science_c, clear
		
		egen num_miss=rowmiss(t_science_1_correct t_science_2_correct t_science_3_correct t_science_4_correct t_science_5_correct t_science_6_correct ///
							t_science_7_correct t_science_8_correct t_science_9_correct t_science_10_correct t_science_11_correct t_science_12_correct)  
		tab num_miss /* 233 with no missing, 1 with 1 missing */
		
		replace t_science_9_correct=0 if num_miss==1 & t_science_9_correct==. /* set answer correct to zero for nonresponse */
		
		*Rename the variables in a way that will be good for looping and re-shaping, and look at descriptive statistics 	
		local i=1    // indexes assessment item
		while `i'<=12 {
			gen s_clear_`i'=t_science_`i'_a==1 if t_science_`i'_a~=. //question was clear to the teacher
			replace t_science_`i'_correct=. if t_science_`i'_b==.   // which answer do you think is right answer?
			gen s_correct_`i'=t_science_`i'_correct  // picked right answer
			gen s_approp_`i'=t_science_`i'_d==1 //appropriate for testing curriculum
			gen s_stupct_`i'=t_science_`i'_c // guess about percent of students who will get correct
			local i=`i' +1
			}
			
		unique schoolid // 186 schools, 234 records/teachers who filled out evaluations
		sort schoolid
		by schoolid: gen num=_N
			tab num  // 64 percent of schools have only one teacher math evaluation, but there are up to 4 in a school
			
		summ s_clear_* s_correct* s_approp* s_stupct*
		
		
		* Calculate total score. Given low correct response rate and our assessment of low item quality, we chose not to use item 6.
		drop s_correct_6
		order s_correct_1 s_correct_2 s_correct_3 s_correct_4 s_correct_5 s_correct_7 s_correct_8 s_correct_9 s_correct_10 s_correct_11 s_correct_12
		egen tot_score = rowtotal(s_correct_1 - s_correct_12)
		tab tot_score
			
		list tot_score if schoolid==16858173  /* value of 8 */
		drop if schoolid==16858173  /* This observation is the only one in its dist_stratum cell and is dropped because it cannot be used in svy commands below. */
			
		  
		tab tot_score  // raw scores
		
		* Generate latent value using IRT.
		irt 2pl s_correct_1 - s_correct_12
		predict TeacherIRTsci, latent
		su TeacherIRTsci, d  /* 246 observations */
					
		* merge in school data 
		
		sort schoolid
		merge m:1 schoolid using basicdata
		keep if _merge==3
		tab studyarm
		gen treat=studyarm==1 | studyarm==2
		tab treat
			
		* set up survey design
		gen dist_stratum = (district*10)+stratum
		svyset schoolid [pweight=sch_wght], strata(dist_stratum)

		* standardize the test score (relative to control sample mean and standard deviation)
		svy, over(treat): mean TeacherIRTsci
				mat b = e(b)
				gen mean = b[1,1]
				estat sd
				mat sd = r(sd)
				gen sd = sd[1,1]
				gen TeacherIRTsci_std=(TeacherIRTsci-mean)/sd
				drop mean sd
		
		summ TeacherIRTsci TeacherIRTsci_std
		svy, over(treat): mean TeacherIRTsci_std
		
		* ITT IMPACT REGRESSION (MATH SUBJECT KNOWLEDGE)
		
		svy: reg TeacherIRTsci_std treat district#stratum 
		estimates store science
		scalar rsquared2=e(r2)
		
		etable, column(index) estimates(science) showstars showstarsnote keep(treat) cstat(_r_b) /// 
				cstat(_r_se) cstat(_r_ci) mstat(N) mstat(r2) ///
				stars(.10 "*" .05 "**" .01 "***", attach(_r_b)) 
		di rsquared2

log close



		
	